#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Created on 16/7/17

import sys,os,re

reload(sys)
sys.setdefaultencoding('utf-8')

__author__ = "troyld"


def get_all_files(dir):
    fileset = []
    for root,dirs,files in os.walk(dir):
        for file in files:
            if '章' in file:
                fileset.append(file)
    return fileset

num_dict = {
    u'一':1,
    u'两':2,
    u'二':2,
    u'三':3,
    u'四':4,
    u'五':5,
    u'六':6,
    u'七':7,
    u'八':8,
    u'九':9,
    u'零':0,
}

unit_dict = {
    u'十':10,
    u'百':100,
    u'千':1000
}

def hanzi2num(hanzi):
    num = []
    for i in hanzi:
        if i in num_dict:
            num.append(num_dict[i])
        if i in unit_dict:
            if i == u'十' and len(num)==0:
                num = [10]
            else:
                num[-1] *= unit_dict[i]
    return  str(sum(num))

if __name__ == '__main__':
    dirs = './data/'
    files = get_all_files('./data')
    for file in files:
        tmp = re.findall('(.*?)章(.*)',file)
        if tmp:
            hanzi = tmp[0][0].decode('utf8')
            name = tmp[0][1].decode('utf8')
            num = hanzi2num(hanzi)
            newfile = num+name
            print file, 'to', newfile
            os.rename(dirs+file,dirs+newfile)
